/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.List;

import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.GeneralizedString;
import mosdi.fa.IIDTextModel;
import mosdi.paa.ClumpSizeCalculator;
import mosdi.util.FileUtils;
import mosdi.util.Iupac;
import mosdi.util.Log;
import mosdi.util.SequenceUtils;

public class ExpClumpSizeSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <iupac-pattern>\n" +
		"\n" +
		"Options:\n" +
		"  -F: read patterns from file\n" +
		"  -r: simultaneously consider reverse complementary motif\n" +
		"  -q <q-gram-table-file>: Estimate text model from given q-gram table (default: uniform i.i.d.)\n" +
		"                          A q-gram table can be created by using \"mosdi-utils count-qgrams\".";
	}

	@Override
	public String description() {
		return "Calculates a motif's expected clump size.";
	}

	@Override
	public String name() {
		return "exp-clump-size";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "Frq:");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String pattern = getStringArgument(0);
			
		// Options
		boolean considerReverse = getBooleanOption("r", false);
		boolean readPatternsFromFile = getBooleanOption("F", false);
		String qGramTableFilename = getStringOption("q", null);
		
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		// create list of patterns
		List<String> patternList = null;
		if (readPatternsFromFile) {
			patternList = FileUtils.readPatternFile(pattern);
		} else {
			patternList = new ArrayList<String>(1);
			patternList.add(pattern);
		}
		FiniteMemoryTextModel textModel = null;

		if (qGramTableFilename!=null) {
			try {
				textModel = SequenceUtils.buildTextModelFromQGramFile(qGramTableFilename);
			} catch (Exception e) {
				Log.errorln(e.toString());
				System.exit(1);
			}
		}
		if (textModel==null) {
			textModel = new IIDTextModel(dnaAlphabet.size());
		}
		for (String forwardPattern : patternList) {
			int maximalOverlap = -1;
			List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
			GeneralizedString forwardGenString = Iupac.toGeneralizedString(forwardPattern); 
			genStringList.add(forwardGenString);
			if (considerReverse) {
				String reversePattern = Iupac.reverseComplementary(forwardPattern);
				genStringList.add(Iupac.toGeneralizedString(reversePattern));
			}
			// create cdfa
			CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
			// calculate clump size distribution
			ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, forwardPattern.length());
			double[] clumpSizeDist = csc.clumpSizeDistribution(30, 1e-300);
			// calculate expected clump size
			double expectedClumpSize = 0.0;
			for (int i=1; i<clumpSizeDist.length; ++i) {
				expectedClumpSize+=clumpSizeDist[i]*i;
			}
			// calculate maximal possible overlap
			if (!considerReverse) {
				maximalOverlap = forwardPattern.length()-1;
				for (;maximalOverlap>0; --maximalOverlap) {
					if (forwardGenString.suffix(maximalOverlap).compatible(forwardGenString.prefix(maximalOverlap))) break;
				}
			}
			Log.println(Log.Level.STANDARD, "Format: >> pattern #generalized-strings expected-clump-size text-model-order maximal-overlap");
			Log.printf(Log.Level.STANDARD, ">> %s %d %e %d %d %n", forwardPattern, genStringList.size(), expectedClumpSize, textModel.order(), maximalOverlap);
		}
		return 0;
	}
}
